

* step 1: get the treated (3-6 months) and the month before the accident

use "$out/data_for_main_regs_event_study_20231203.dta", clear
drop if control==1
append using "$out/additional_data_for_IV_and_placebo_regs_20240416.dta"
replace treated=1 if d==2
keep if treated==1
keep if d<=6
keep anon tmerge_t 
gen t=tmerge_t-1
drop tmerge_t
codebook anon

* step 2: get their firm and occupation (2-digit) in the month before the accident

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) keepusing(vallazon1 feor1_h2) nogen

* step 3: flag those (and do not use later) which are 
	* not the first accident with absence in the firm-occupation cell
	* have another accident in the firm-occupation cell within 2 years
	*** Comment: here we neglect ACC with no ABS or longer ABS in firm-occupation cell

egen first_acc_in_firmocc=min(t), by(vallazon1 feor1_h2)

sort vallazon1 feor1_h2 t
gen next_in_firmocc=t[_n+1] if vallazon1==vallazon1[_n+1] & feor1_h2==feor1_h2[_n+1]
gen in2y_in_firmocc=next_in_firmocc-t<=24

label var first_acc_in_firmocc "First ACC w 2<=d<=6 in firm-occup cell"
label var next_in_firmocc "pre-month of next ACC w 2<=d<=6 in firm-occup cell"
label var in2y_in_firmocc "There is an ACC w 2<=d<=6 within next 2 years in firm-occup cell"

save "$temp/treated_20250704_BM.dta"
sum

save "$out/list_of_people_with_accidents.dta"

* step 4: take all the other people working in the same 2-digit occupation at the same firm one month before the accident

keep if first_acc_in_firmocc==t 
codebook anon
keep if in2y_in_firmocc!=1
codebook anon
drop if vallazon1==.
codebook anon
drop if feor1_h2==.
codebook anon

keep vallazon1 t feor1_h2
merge 1:m vallazon1 t feor1_h2 using "$in/admin3_alap.dta", keep(master match) keepusing(anon) nogen

* take out the person suffering the accident
merge 1:1 vallazon1 t feor1_h2 anon using "$temp/treated_20250704_BM.dta"
drop if _merge==3 | _merge==2
drop _merge
gen treated=1
sum
drop first_acc_in_firmocc next_in_firmocc in2y_in_firmocc
codebook anon
save "$temp/treated_coworkers_20250704_BM.dta"

* step 5: assign comparable firm-occupation cells with no accident
	* same size category and occupation
	* make sure that there was no accident in the control firm-occupation cell before and 2 years after
	* only focus on 1-6 months of absence 
	* flag the ones with no-absence or longer absence accidents 

* create size categories for the treated
collapse (count) admin_letszam=anon, by(vallazon1 feor1_h2 t)
gen ev=2003+int((t-1)/12)
rename vallazon1 vallazon
merge m:1 vallazon ev using "$in/admin3_nav_eredeti.dta", keep(master match) keepusing(letszam)
rename vallazon vallazon1
gen size_cat=1 if letszam<10
replace size_cat=2 if letszam>=10 & letszam<50
replace size_cat=3 if letszam>=50 & letszam<250
replace size_cat=4 if letszam>=250 & letszam!=.
* use employment from admin if no employment info from balanc sheet
replace size_cat=1 if letszam==. & admin_letszam<5
replace size_cat=2 if letszam==. & admin_letszam<25 & admin_letszam>=5
replace size_cat=3 if letszam==. & admin_letszam<125 & admin_letszam>=25
replace size_cat=4 if letszam==. & admin_letszam!=. & admin_letszam>=125
drop _merge letszam admin_letszam
save "$temp/treated_firms_20250704_BM.dta"

* add size groups to treated co-workers

use "$temp/treated_coworkers_20250704_BM.dta", clear
merge m:1 vallazon1 t feor1_h2 using "$temp/treated_firms_20250704_BM.dta", keep(master match) keepusing(size_cat) nogen


save "$temp/treated_coworkers_20250704_BM.dta", replace
 	
* get the potential control firms
	
use anon t vallazon1 feor1_h2 if vallazon1!=. & feor1_h2!=. using "$in/admin3_alap.dta", clear
collapse (count) admin_letszam=anon, by(vallazon1 feor1_h2 t)	
gen ev=2003+int((t-1)/12)
rename vallazon1 vallazon
merge m:1 vallazon ev using "$in/admin3_nav_eredeti.dta", keep(master match) keepusing(letszam)
rename vallazon vallazon1
gen size_cat=1 if letszam<10
replace size_cat=2 if letszam>=10 & letszam<50
replace size_cat=3 if letszam>=50 & letszam<250
replace size_cat=4 if letszam>=250 & letszam!=.
* use employment from admin if no employment info from balanc sheet
replace size_cat=1 if letszam==. & admin_letszam<5
replace size_cat=2 if letszam==. & admin_letszam<25 & admin_letszam>=5
replace size_cat=3 if letszam==. & admin_letszam<125 & admin_letszam>=25
replace size_cat=4 if letszam==. & admin_letszam!=. & admin_letszam>=125
drop _merge letszam admin_letszam
rename vallazon1 vallazon_control
drop ev
joinby t feor1_h2 size_cat using "$temp/treated_firms_20250704_BM.dta"
label var size_cat "1: <10 emp, 2: 10-49 emp, 3: 50-149 emp, 4: 150- emp"

* drop the pairs in which the same firm is assigned
drop if vallazon_control==vallazon1

drop ev
drop vallazon1
duplicates drop
rename vallazon_control vallazon1

* check accidents in the control firms
preserve
use "$temp/treated_20250704_BM.dta", clear
keep if vallazon1!=. & feor1_h2!=.
keep t vallazon1 feor1_h2
collapse (min) mint=t, by(vallazon1 feor1_h2)
save "$temp/todrop_20250704_BM.dta"
restore
merge m:1 vallazon1 feor1_h2 using "$temp/todrop_20250704_BM.dta"
drop if _merge==2

* flag firm-occupation cells with accident ever
gen ACC_w_ABS1_5_firmocc_ever=_merge==3
label var ACC_w_ABS1_5_firmocc_ever "firm-occupation cell has an ACC with 2<=d<=6 ever"

* drop firm-occupation cells with an accident before or in the next 2 years
gen todrop=mint-24<=t
codebook vallazon1 
codebook vallazon1 if todrop==1
codebook vallazon1 if ACC_w_ABS1_5_firmocc_ever!=1
drop if todrop==1
drop mint _merge todrop

* check accidents by firm

preserve
use "$temp/treated_20250704_BM.dta", clear
keep if vallazon1!=. & feor1_h2!=.
keep t vallazon1
collapse (min) mint=t, by(vallazon1)
save "$temp/todrop1_20250704_BM.dta"
restore
merge m:1 vallazon1 using "$temp/todrop1_20250704_BM.dta"
drop if _merge==2

* flag firms with accident ever
gen ACC_w_ABS1_5_firm_ever=_merge==3
label var ACC_w_ABS1_5_firm_ever "firm has an ACC with 2<=d<=6 ever"

* flag firms with an accident before or in the next 2 years
gen ACC_w_ABS1_5_firm_until_tplus24=mint-24<=t
label var ACC_w_ABS1_5_firm_until_tplus24 "firm has an ACC with 2<=d<=6 until t+24"
drop mint _merge 

* check accident irrespective of absence (ACC with ABS>12 not considered here)
preserve
use "$out/data_for_main_regs_event_study_20231203.dta", clear
drop if control==1
append using "$out/additional_data_for_IV_and_placebo_regs_20240416.dta"
keep if d!=.
keep anon tmerge_t
gen t=tmerge_t-1
drop tmerge_t
merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) keepusing(vallazon1 feor1_h2) nogen
collapse (min) mint=t, by(vallazon1 feor1_h2)
save "$temp/toflag_20250704_BM.dta"
restore

* flag firm-occupation cells with an accident up to next 2 years/ever without absence/with a longer absence
merge m:1 vallazon1 feor1_h2 using "$temp/toflag_20250704_BM.dta"
drop if _merge==2

* flag firm-occupation cells with any accident ever
gen ACC_any_firmocc_ever=_merge==3
label var ACC_any_firmocc_ever "firm-occupation cell has any ACC with 1<=d<=12 ever"

* flag firm-occupation cells with any accident before or in the next 2 years
gen ACC_any_firmocc_until_tplus24=mint-24<=t
label var ACC_any_firmocc_until_tplus24 "firm has an ACC with 1<=d<=12 until t+24"
drop mint _merge 

* flag firms with an accident up to next 2 years/ever without absence/with a longer absence

preserve
use "$temp/toflag_20250704_BM.dta", clear
collapse (min) mint, by(vallazon1)
save "$temp/toflag1_20250704_BM.dta"
restore
merge m:1 vallazon1 using "$temp/todrop1_20250704_BM.dta"
drop if _merge==2

* flag firms with accident ever
gen ACC_any_firm_ever=_merge==3
label var ACC_any_firm_ever "firm has an ACC with 1<=d<=12 ever"

* flag firms with an accident before or in the next 2 years
gen ACC_any_firm_until_tplus24=mint-24<=t
label var ACC_any_firm_until_tplus24 "firm has an ACC with 1<=d<=12 until t+24"
drop mint _merge 
egen cell=group( vallazon1 feor1_h2)
codebook cell
codebook cell if ACC_w_ABS1_5_firmocc_ever!=1
codebook cell if ACC_w_ABS1_5_firm_ever!=1 
codebook cell if ACC_w_ABS1_5_firm_until_tplus24!=1 
codebook cell if ACC_any_firmocc_ever!=1 
codebook cell if ACC_any_firmocc_until_tplus24!=1 
codebook cell if ACC_any_firm_ever!=1 
codebook cell if ACC_any_firm_until_tplus24!=1
drop cell
	
* Step 6: assign control people working in other firms but in the same occupation in the month before the treatment

merge 1:m vallazon t feor1_h2 using "$in/admin3_alap.dta", keep(master match) keepusing(anon) nogen

gen treated=0

append using "$temp/treated_coworkers_20250704_BM.dta"
foreach X in ACC_w_ABS1_5_firmocc_ever ACC_w_ABS1_5_firm_ever ACC_w_ABS1_5_firm_until_tplus24 ACC_any_firmocc_ever ACC_any_firmocc_until_tplus24 ACC_any_firm_ever ACC_any_firm_until_tplus24{
	replace `X'=1 if `X'==. & treated==1
}

* Step 7: drop those which are not employed based on our definition

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(w1 wh1 fogvisz1 kor)
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 sick_leave_days

* construct EMP (employment) and NONEMP (non-employment) variables *
gen     EMP = 1 if (vallazon1!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (sick_leave_days<6 | sick_leave_days==.) & (fogvisz1==201)
tab EMP treated, m
keep if EMP==1
drop w1 wh1 fogvisz1 sick_leave_days EMP 

* Step 8: drop observations outside our age group 

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(kor)
keep if kor>=20 & kor<=50
drop kor

* Step 9: flag those people which are among the potential controls we use before

preserve
use "$out/IVdata_accidents_bnoSext2_SL5_all_controls_nod_2024Apr22.dta", clear
keep anon
duplicates drop
save "$temp/controls_20250704_BM.dta"
restore

merge m:1 anon using "$temp/controls_20250704_BM.dta"
drop if _merge==2
gen controls_we_used=_merge==3
drop _merge
tab treated

label var treated "Co-workers of ACC with 2<=d<=6 people"
label var controls_we_used "Potential controls we used before"
rename t pre_month
label var pre_month "Month before the event"

* Step 10: create indicators for first events by anon and first treated events by anon

egen first_event=min( pre_month), by(anon)
label var first_event "first event of a person considering both treated and control"
egen first_within_treated=min(pre_month) if treated==1, by(anon)
label var first_within_treated "first treated event of a person"

* Step 11: save the anon-pre_month-level data

order anon pre_month treated vallazon1 feor1_h2 size_cat controls_we_used
sum
save "$out/co_workers_with_controls_event_level_20250704_BM.dta"

